;
; Ullrich von Bassewitz, 17.08.1998
;
; CC65 runtime: division for long unsigned ints
;

       	.export		tosudiveax, getlop, udiv32
	.import		addysp1
	.importzp	sp, sreg, tmp3, tmp4, ptr1, ptr2, ptr3, ptr4

tosudiveax:
	jsr	getlop		; Get the paramameters
 	jsr	udiv32		; Do the division
 	lda	ptr1		; Result is in ptr1:sreg
 	ldx	ptr1+1
 	rts

; Pop the parameters for the long division and put it into the relevant
; memory cells. Called from the signed divisions also.

getlop:	sta    	ptr3	  	; Put right operand in place
       	stx    	ptr3+1
 	lda	sreg
 	sta	ptr4
 	lda	sreg+1
 	sta	ptr4+1

	ldy	#0	  	; Put left operand in place
	lda	(sp),y
	sta	ptr1
	iny
	lda	(sp),y
	sta	ptr1+1
	iny
 	lda	(sp),y
 	sta	sreg
 	iny
 	lda	(sp),y
 	sta	sreg+1
 	jmp	addysp1	  	; Drop parameters

; Do (ptr1:sreg) / (ptr3:ptr4) --> (ptr1:sreg), remainder in (ptr2:tmp3:tmp4)
; This is also the entry point for the signed division

udiv32:	lda	#0
	sta	ptr2+1
	sta	tmp3
	sta	tmp4
; 	sta	ptr1+1
 	ldy	#32
L0:	asl	ptr1
	rol	ptr1+1
	rol	sreg
	rol	sreg+1
	rol	a
	rol	ptr2+1
	rol	tmp3
	rol	tmp4

; Do a subtraction. we do not have enough space to store the intermediate
; result, so we may have to do the subtraction twice.

	pha
	cmp	ptr3
	lda	ptr2+1
	sbc	ptr3+1
	lda	tmp3
	sbc	ptr4
	lda	tmp4
	sbc	ptr4+1
	bcc	L1

; Overflow, do the subtraction again, this time store the result

        sta    	tmp4            ; We have the high byte already
	pla
	sbc	ptr3		; byte 0
	pha
	lda    	ptr2+1
	sbc	ptr3+1
	sta	ptr2+1		; byte 1
	lda	tmp3
	sbc	ptr4
	sta	tmp3  		; byte 2
	inc	ptr1  		; Set result bit

L1:	pla
	dey
	bne	L0
	sta	ptr2
	rts


